set.seed(2423045)
rm(list=ls())
library(Matching)
library(foreign)

source("AutoCluster3.R")

dta <- read.dta(file="ec675_nsw.dta")
dta$early.ra <- dta$early_ra
dta.rct <- dta[dta$sample==1,]
early.ra <- dta.rct[dta.rct$early.ra==1,]

benchmark <- mean(early.ra$re78[early.ra$treat==1])-mean(early.ra$re78[early.ra$treat==0])

cat("early RA RCT sample. Treated obs:",sum(early.ra$treat==1),"\n")
cat("early RA RCT sample. Control obs:",sum(early.ra$treat==0),"\n")
cat("early RA RCT sample. Benchmark:",benchmark,"\n")


load("rep.sourcecode.re74psid1.RData")

#treat.dta reorders the columns so that they can be stacked on top of foo
#                           "treat"     "age"      "education" "           black"       "hispan"       "married"        "nodegree"            "re74"      "re75"          "re78"
treat.dta <- cbind(early.ra$treated, early.ra$age, early.ra$educ, early.ra$black, early.ra$hisp, early.ra$married, early.ra$nodegree, early.ra$re74, early.ra$re75, early.ra$re78)
treat.dta <- as.data.frame(treat.dta)
names(treat.dta) <- names(foo)
treat.dta <- treat.dta[treat.dta$treat==1,]

#remove the DW treated and put in the early RA treated
foo.orig <- foo
foo <- foo.orig[foo.orig$treat==0,]
foo <- rbind(foo, treat.dta)

X <-   as.matrix(cbind( foo$age,
                        foo$educ,
                        foo$black,
                        foo$hispan,
                        foo$married,
                        foo$nodegree,
                        I(foo$re74/1000),
                        #I(as.real(foo$re74 == 0)),
                        I(foo$re75/1000)))# ,
                        #I(as.real(foo$re75 == 0))))
                 
BalanceMat <- as.matrix(cbind(
                     foo$age,
                     foo$educ,
                     foo$black,
                     foo$hispan,
                     foo$married,
                     foo$nodegree,
                     I(foo$re74/1000),
                     I(foo$re75/1000),
                     I((foo$re74/1000)^2),
                     I((foo$re75/1000)^2),
                         
                     I((foo$age)^2),
                     I((foo$educ)^2),
                     I((foo$black)^2),
                     I((foo$hispan)^2),
                     I((foo$married)^2),
                     I((foo$nodegree)^2),
                         
                     I(foo$age*foo$educ),
                     I(foo$age*foo$black),
                     I(foo$age*foo$hispan),
                     I(foo$age*foo$married),    
                     I(foo$age*foo$nodegree),
                     I(foo$age*(foo$re74/1000)),
                     I(foo$age*(foo$re75/1000)),

                     I(foo$educ*foo$black),
                     I(foo$educ*foo$hispan),
                     I(foo$educ*foo$married),    
                     I(foo$educ*foo$nodegree),
                     I(foo$educ*(foo$re74/1000)),
                     I(foo$educ*(foo$re75/1000)),

#                     I(foo$black*foo$hispan), This is always zero!
                     I(foo$black*foo$married),
                     I(foo$black*foo$nodegree),
                     I(foo$black*(foo$re74/1000)),
                     I(foo$black*(foo$re75/1000)),

                     I(foo$hispan*foo$married),
                     I(foo$hispan*foo$nodegree),
                     I(foo$hispan*(foo$re74/1000)),
                     I(foo$hispan*(foo$re75/1000)),

                     I(foo$married*foo$nodegree),
                     I(foo$married*(foo$re74/1000)),
                     I(foo$married*(foo$re75/1000)),
                                                
                     I(foo$nodegree*(foo$re74/1000)),
                     I(foo$nodegree*(foo$re75/1000)),                        

                     I((foo$re74/1000)*(foo$re75/1000))                                                 
                         ) )


# From Review of Econ and Statistics
model.A = foo$treat~I(foo$age) + I(foo$age^2) + I(foo$age^3) + I(foo$educ) + I(foo$educ^2) + I(foo$married) + I(foo$nodegree) + I(foo$black) + I(foo$hispan) + I(foo$re74) + I(foo$re75) + I(foo$re74 == 0) + I(foo$re75 == 0) + I(I(foo$re74)*I(foo$educ))

pscores.A <- glm(model.A, family = binomial)

X2 <- cbind(X,BalanceMat[,9:ncol(BalanceMat)]) #first 8 spots of BalanceMat are already in X
for (i in 1:ncol(X2))
  {
    lm1 = lm(X2[,i]~pscores.A$linear.pred)
    X2[,i] = lm1$residual
   }

# VARIABLES WE MATCH ON BEGIN WITH PROPENSITY SCORE
orthoX2.plus.pscore = cbind(pscores.A$linear.pred, X2)
orthoX2.plus.pscore[,1] <- orthoX2.plus.pscore[,1] -  mean(orthoX2.plus.pscore[,1])


# NORMALIZE ALL X COVARS BY STANDARD DEVIATION
#for (i in 1:(ncol(X)+1))
for (i in 1:ncol(X2))
  {
    orthoX2.plus.pscore[,i] <-
      orthoX2.plus.pscore[,i]/sqrt(var(orthoX2.plus.pscore[,i]))
  }

orthoX2.plus.pscore <- orthoX2.plus.pscore[,1:8] #use just the original X
#sv <- rep(1, ncol(orthoX2.plus.pscore))

#sv from earlyRA1.cps1.gm1.Rout
sv <- c(
        9.745892e+02, 3.736525e+02, 5.909875e+01, 2.843803e+00, 5.604069e+02, 3.276028e+02, 8.066618e+00, 1.916231e+02        
        )


for (s in 1:100)
  {
    cat("*S:",s,"**************************************************************\n")
    cl <- NCPUS()
    GM.out <-  GenMatch(Tr = foo$treat,
                        X = orthoX2.plus.pscore,
                        starting.values=sv,
                        BalanceMatrix = BalanceMat,
                        pop.size = 5000,
                        max.generations = 500,
                        hard.generation.limit = TRUE,
                        wait.generations = 20,
                        cluster=cl)
    stopCluster(cl)

    sv=diag(GM.out$Weight.matrix)
    cat("Starting Values:\n")
    write.csv(sv,eol=", ", row.names=FALSE)

    mout <- Match(Y=foo$re78,
                  Tr = foo$treat,
                  X = orthoX2.plus.pscore,
                  Weight.matrix=GM.out)
    summary(mout)
  }


